package com.blog.spark.utils

import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{StringType, StructField, StructType}

/**
  * @description 访问日志转换（输入转输出）工具类
  * @author yuyon26@126.com
  * @date 2018/10/5 18:01
  */
object AccessConvertUtils {

  val struct = StructType(
    Array(
      StructField("ip", StringType),
      StructField("area", StringType),
      StructField("time", StringType),
      StructField("day", StringType),
      StructField("traffic", StringType),
      StructField("browser", StringType),
      StructField("os", StringType)
    )
  )

  /**
    * 根据输入的每一行信息转换成输出的样式
    * 输入：123.147.250.68	2018-09-14 10:12:33	GET	http://www.bblog.vip/	HTTP/1.1	200	649	-	MSIE	Windows
    * 输出：
    *
    * @param log
    */
  def parseLog(log: String) = {
    val oneLog: String = NginxStatOneLog.parseOneLog(log);
    if (StringUtils.isNotBlank(oneLog)) {
      val logs = oneLog.split("\t")
      try {
        val ip = logs(0)
        val area = IPUtils.getCity(ip).replaceAll(" ", "")
        val time = logs(1)
        val day = time.substring(0, 10).replaceAll("-", "")
        val traffic = logs(2)
        val browser = logs(3)
        val os = logs(4)
        if (area.equals("全球")||browser.equals("Unknown")||os.equals("Unknown")){
          Row(null)
        }else{
          Row(ip, area, time, day, traffic, browser, os)
        }
      } catch {
        case e: Exception => {
          println(s"解析[${log}]失败")
          Row(null)
        }
      }
    }else{
      Row(null)
    }
  }

}
